require(Seurat)
require(DT) # for datatable
require(SeuratWrappers) # for fastMNN
require(harmony) # for RunHarmony
require(reshape2) # for dcast
ImportTable <- function(file, header = T, row.names = 1, sep = ',', check.names = FALSE, ...){
data <- read.csv(file = file, header = header, row.names = row.names, sep = sep, check.names = check.names, ...)
return(data)
}
show_table <- function(df, rownames = T, filter="top", options = list(pageLength = 10, scrollX=T), ...){
if (ncol(df) > 50) {
df <- df[, 1:50]
message('Due to the column dim is > 50, it only shows the ')
}
datatable(df, rownames = rownames,
filter=filter, options = options, ... )
}
MultMedFC: Multiple Median Fold Change, compared each samples between 2 Stages (Control vs CRC)
MultMedFC <- function(data1, data2, FeatureAsRow = Tsf){
if (FeatureAsRow) {
data1 <- t(data1)
data2 <- t(data2)
}
data1 <- as.data.frame(data1)
data2 <- as.data.frame(data2)
if (! all(colnames(data1) == colnames(data2))) {
message("rowname of data1 and data2 is not fullly same!! plz check it !!")
return()
}
sf_df <- matrix(NA, ncol = 3); sf_df <- sf_df[-1, ]
for (sf in colnames(data1)) {
sf_FC <- NULL
if (sum(data1[[sf]] == 0) == nrow(data1) & sum(data2[[sf]] == 0) == nrow(data2)) {
sf_df <- rbind(sf_df, c(1,1,1))
}else{
min_value <- min(c(data1[[sf]][data1[[sf]] != 0], data2[[sf]][data2[[sf]] != 0]))/10
data1[[sf]][data1[[sf]] == 0] <- min_value
data2[[sf]][data2[[sf]] == 0] <- min_value
for (i1 in 1:nrow(data1)) {
for (i2 in 1:nrow(data2)) {
sf_FC <- c(sf_FC, data1[[sf]][i1]/data2[[sf]][i2])
}
}
sf_wil <- wilcox.test(sf_FC, conf.int = TRUE)
sf_df <- rbind(sf_df, c(as.numeric(sf_wil$estimate), as.numeric(sf_wil$conf.int)))
}
}
sf_df <- as.data.frame(sf_df); rownames(sf_df) <- colnames(data1); colnames(sf_df) <- c("Median", 'low-CI', 'high-CI')
return(sf_df)
}
FileCreate <- function(DirPath = "./",Prefix = "ExampleTest", Suffix = "pdf", version = "0.0.0"){
date=as.character(Sys.Date())
DirPath = gsub("/$","",DirPath)
Suffix = gsub('^[.]',"",Suffix)
if(! dir.exists(DirPath)){
dir.create(DirPath,recursive =TRUE)
}
if (version == "0.0.0") {
version=100
while(TRUE){
version_=paste(unlist(strsplit(as.character(version),"")),collapse=".")
DirPathName = paste0(DirPath,"/",date,'-',Prefix,'-v',version_,".",Suffix)
if(! file.exists(DirPathName)){
return(DirPathName)
break
}
version = version + 1
}
}else{
DirPathName = paste0(DirPath,"/",date,'-',Prefix,'-v',version_,".",Suffix)
if(! dir.exists(DirPathName)){
return(DirPathName)
}
return(DirPathName)
}
}
summarized_df <- ImportTable(file = '../01.SSTF/2021-07-07-Summary-walsh-median-v1.0.0.csv')
sel_df <- summarized_df[(summarized_df$P.count == 8 | summarized_df$N.count == 8), ] # 6
sel2_df <- summarized_df[(summarized_df$P.count >= 7 | summarized_df$N.count >= 7), ] # 66
sel3_df <- summarized_df[(summarized_df$P.count >= 6 | summarized_df$N.count >= 6), ] # 260
show_table(summarized_df) %>%
formatSignif(columns = colnames(summarized_df[,c(5:20)]),
digits = 3, interval = 1) %>%
formatRound(columns = colnames(summarized_df[,c(1:4)]),
digits = 0, interval = 1)
tax_name <- ImportTable(file = '../00.ProcessData/2021-04-12-allTaxonomySplitLevel-v1.0.1.tsv', sep = '\t')
tax_name$new_name <- gsub(pattern = 's__', replacement = '', x = tax_name$Specie) %>%
gsub('_', ' ', . ) # due to Feature names of CreateSeuratObject cannot have underscores, replace with blanket space.
show_table(tax_name)
raw_tax_df <- ImportTable(file = '../00.ProcessData/2021-04-12-ReAbun-Fungi-filter1802-v1.0.0.tsv', sep = '\t')
# tax_df <- ImportTable(file = '../00.ProcessData/2021-04-12-RawData-Fungi-filter1802-v1.0.0.tsv', sep = '\t')
rownames(raw_tax_df) <- tax_name[rownames(raw_tax_df), "new_name"] # use the short species names
# show_table(raw_tax_df)
tax_df <- raw_tax_df[rownames(sel3_df),]
show_table(tax_df) %>%
formatSignif(columns = colnames(tax_df)[1:50],
digits = 3, interval = 1)
## Due to the column dim is > 50, it only shows the
meta_df <- ImportTable(file = '../00.ProcessData/metaInfo-subgroup-v4.1.csv')
meta_df <- meta_df[colnames(tax_df),]
show_table(meta_df)
CreateSeuratObject: Create a Seurat object link
s_obj <- CreateSeuratObject(counts = tax_df, meta.data = meta_df)
# split the object by cohorts
s_list <- SplitObject(s_obj, split.by = 'Cohort')
Fast integration using reciprocal PCA (RPCA) link
calculateORnot <- T
if (calculateORnot) {
for (i in 1:length(s_list)) {
s_list[[i]] <- NormalizeData(s_list[[i]], verbose = FALSE)
s_list[[i]] <- FindVariableFeatures(s_list[[i]], selection.method = "vst", nfeatures = 2000, verbose = FALSE)
}
s_anchors <- FindIntegrationAnchors(object.list = s_list, k.anchor = 20)
s_integrated <- IntegrateData(anchorset = s_anchors, k.weight = 10) %>%
ScaleData(. , verbose = FALSE) %>%
RunPCA(. , npcs = 30, verbose = FALSE) %>%
RunUMAP(. , reduction = "pca", dims = 1:30, verbose = FALSE)
prca_obj <- FindNeighbors(s_integrated, dims = 1:30) %>%
FindClusters()
prca_table <- dcast(as.data.frame(table(prca_obj@active.ident,prca_obj@meta.data[["Stage"]])), Var1 ~ Var2)
rownames(prca_table) <- prca_table$Var1; prca_table <- prca_table[,-1]
mod_file <- FileCreate(DirPath = '../02.BatchEffect/RPCA', Prefix = 'RPCA-model', Suffix = 'rds')
saveRDS(object = prca_obj, file = mod_file)
tab_file <- FileCreate(DirPath = '../02.BatchEffect/RPCA', Prefix = 'RPCA-summary', Suffix = 'csv')
write.csv(x = prca_table, file = tab_file)
}else{
prca_obj <- readRDS(file = '../02.BatchEffect/RPCA/2021-07-09-RPCA-model-v1.0.0.rds')
prca_table <- ImportTable(file = '../02.BatchEffect/RPCA/2021-07-09-RPCA-summary-v1.0.0.csv')
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1802
## Number of edges: 72235
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7232
## Number of communities: 7
## Elapsed time: 0 seconds
show_table(prca_table)
DimPlot(prca_obj, group.by = c("Cohort", "Stage", 'ident'), ncol = 1)
fastMNN: Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors link
Nature Biotechnology, 2018, DOI: 10.1038/nbt.4091
calculateORnot <- T
if (calculateORnot) {
fMNN_obj <- NormalizeData(s_obj) %>%
FindVariableFeatures() %>%
SplitObject(. , split.by = 'Cohort') %>%
RunFastMNN()
fMNN_obj2 <- RunUMAP(fMNN_obj, reduction = 'mnn', dims = 1:30) %>%
FindNeighbors(. , reduction = 'mnn', dims = 1:30) %>%
FindClusters()
fMNN_table <- dcast(as.data.frame(table(fMNN_obj2@active.ident,fMNN_obj2@meta.data[["Stage"]])), Var1 ~ Var2)
rownames(fMNN_table) <- fMNN_table$Var1; fMNN_table <- fMNN_table[,-1]
mod_file <- FileCreate(DirPath = '../02.BatchEffect/fastMNN', Prefix = 'fastMNN-model', Suffix = 'rds')
saveRDS(object = fMNN_obj2, file = mod_file)
tab_file <- FileCreate(DirPath = '../02.BatchEffect/fastMNN', Prefix = 'fastMNN-summary', Suffix = 'csv')
write.csv(x = fMNN_table, file = tab_file)
}else{
fMNN_obj2 <- readRDS(file = '../02.BatchEffect/fastMNN/2021-07-09-fastMNN-model-v1.0.0.rds')
fMNN_table <- ImportTable(file = '../02.BatchEffect/fastMNN/2021-07-09-fastMNN-summary-v1.0.0.csv')
}
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from mnn.reconstructed_ to mnnreconstructed_
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1802
## Number of edges: 79262
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6388
## Number of communities: 7
## Elapsed time: 0 seconds
show_table(fMNN_table)
DimPlot(fMNN_obj2, group.by = c("Cohort", "Stage", 'ident'), ncol = 1)
Fast, sensitive, and flexible integration of single cell data with Harmony. link
Nature Methods, 2019, DOI: 10.1038/s41592-019-0619-0
calculateORnot <- T
if (calculateORnot) {
hmy_obj <- NormalizeData(s_obj) %>%
FindVariableFeatures() %>%
ScaleData() %>%
RunPCA(. , verbose = FALSE)
hmy_obj2 <- RunHarmony(object = hmy_obj, group.by.vars = 'Cohort') %>%
RunUMAP(., reduction = "harmony", dims = 1:30) %>%
FindNeighbors(., reduction = "harmony", dims = 1:30) %>%
FindClusters()
hmy_table <- dcast(as.data.frame(table(hmy_obj2@active.ident,hmy_obj2@meta.data[["Stage"]])), Var1 ~ Var2)
rownames(hmy_table) <- hmy_table$Var1; hmy_table <- hmy_table[,-1]
mod_file <- FileCreate(DirPath = '../02.BatchEffect/Harmony', Prefix = 'Harmony-model', Suffix = 'rds')
saveRDS(object = hmy_obj2, file = mod_file)
tab_file <- FileCreate(DirPath = '../02.BatchEffect/Harmony', Prefix = 'Harmony-summary', Suffix = 'csv')
write.csv(x = hmy_table, file = tab_file)
}else{
hmy_obj2 <- readRDS(file = '../02.BatchEffect/Harmony/2021-07-09-Harmony-model-v1.0.0.rds')
hmy_table <- ImportTable(file = '../02.BatchEffect/Harmony/2021-07-09-Harmony-summary-v1.0.0.csv')
}
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1802
## Number of edges: 83057
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7194
## Number of communities: 8
## Elapsed time: 0 seconds
show_table(hmy_table)
DimPlot(hmy_obj2, group.by = c("Cohort", "Stage", 'ident'), ncol = 1)